require 'config/environment'

task :make_kanji_list do
  raise "IO Error: Please specify html directry (UTF-8)" unless ENV.has_key? "dir"
  raise "IO Error: Please specify output filename" unless ENV.has_key? "out"
  
  require "set"
  results = Hash.new{|hash,key| hash[key] = Set.new}
  
  #read data
  Dir.glob("#{File.expand_path(ENV["dir"])}/*.html") do |filename|
    match_data = /\d+/.match(filename)
    raise "Filename Error: Please Filename include integer of Kanji Level" if match_data.nil?
    level = match_data[0].to_i
    
    File.open(filename,"r") do |file|
      while line = file.gets
        match_data = /<td class="kanji">(.+?)<\/td>/.match(line)
        next if match_data.nil?
        results[level] << match_data[1]
      end
    end
  end
  
  #save data
  levels = results.keys.sort
  i = 0
  rs = ENV.has_key?("rs") ? ENV["rs"] : "\t"
  require 'csv'
  file = CSV.open(ENV["out"],"w",rs)
  while i < levels.size
    data = (i != levels.size - 1) ? results[levels[i]] - results[levels[i+1]] : results[levels[i]]
    data.each do |item|
      file << [item,levels[i]]
    end
    i += 1
  end
  file.close
end